Online-Academy
Look, Read, Understand, Apply

Simple Stats

import pandas as pd
import numpy as np
from scipy.stats import zscore
# Create sample dataset
np.random.seed(42)
n_samples = 10

df = pd.DataFrame({
    'customer_id': range(1, n_samples + 1),
    'age': np.random.normal(35, 10, n_samples).astype(int),
    'income': np.random.normal(50000, 15000, n_samples).astype(int),
    'purchase_amount': np.random.gamma(2, 50, n_samples).astype(int),
    'purchase_frequency': np.random.poisson(3, n_samples)
})
print(df)
print(df[['age', 'income', 'purchase_amount']].describe())
print(df[['age', 'income', 'purchase_amount']].quantile(0.5))
# Skewness and Kurtosis
print("Skewness:\n", df[['age', 'income', 'purchase_amount']].skew())
print("\nKurtosis:\n", df[['age', 'income', 'purchase_amount']].kurtosis())

# Quartiles and IQR
q1 = df['purchase_amount'].quantile(0.25)
q2 = df['purchase_amount'].quantile(0.5)
q3 = df['purchase_amount'].quantile(0.75)
iqr = q3 - q1
print(f"\nPurchase Amount - Q1: Rs.{q1:.2f}, Q3: Rs.{q3:.2f}, IQR: Rs.{iqr:.2f}")
# Outlier detection using Z-score
df['purchase_zscore'] = zscore(df['purchase_amount'])
outliers = df[abs(df['purchase_zscore']) > 3]
print(f"Number of purchase outliers: {len(outliers)}")

# Pearson correlation matrix
numeric_cols = ['age', 'income', 'purchase_amount', 'purchase_frequency'
                ]
correlation_matrix = df[numeric_cols].corr()
print("Correlation Matrix:\n", correlation_matrix.round(2))